# 调整mysql中blog的分类id和作者id，转换作者和分类数据到各自的表中
import pymysql
# 连接mysql
from DBUtils.PersistentDB import PersistentDB
pool1 = PersistentDB(creator=pymysql, maxusage=None, ping=0, closeable=False, host='127.0.0.1', user='root',
                         password='root', db='blog_spider', charset='utf8')
conn = pool1.connection()
cursor = conn.cursor()
# 分类数据唯一列表提取
category_sql='select category from blog_copy group by category'
cursor.execute(category_sql)
result1=cursor.fetchall()
category_list=set([item[0].strip().split(',')[0] for item in result1])
# 作者数据唯一列表提取
author_sql='select author from blog_copy group by author'
cursor.execute(author_sql)
result2=cursor.fetchall()
author_list=set([item[0].strip() for item in result2])
print(author_list)

# 进行数据插入
#category表
# for one in category_list:
#     cursor.execute('insert into category values(NULL,%s)',(one.strip(),))
#     conn.commit()
#
# for two in author_list:
#     cursor.execute('insert into author values(NULL,%s)', (two.strip(),))
#     conn.commit()

# 替换blog表中category和author为id的模式
# 构建category字典
cursor.execute('select * from category')
result1=cursor.fetchall()
category_dict={item[1].strip():item[0] for item in result1}
# author字典
cursor.execute('select * from author')
result2=cursor.fetchall()
author_dict={item[1]:item[0] for item in result2}

cursor.execute('select * from blog_copy')
result=cursor.fetchall()
print(category_dict)
print(author_dict)
for item in result:
    sql='update blog set category=%s,author=%s where id=%s'
    id=item[0]
    category_id=category_dict[item[4].strip().split(',')[0].strip()]
    author_id=author_dict[item[3].strip()]
    print(category_id,author_id)
    cursor.execute(sql,(category_id,author_id,id))
    conn.commit()
